library(tidyverse) # For data manipulation and visualization
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(lubridate) # For working with date-time data
library("ggplot2")
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(leaflet)
library(leaflet.extras)
## Warning: package 'leaflet.extras' was built under R version 4.2.3
# Load data
data <- read.csv("DataCoSupplyChainDataset.csv")
data_description <- read.csv("DescriptionDataCoSupplyChain.csv")
# Display summary statistics for numerical variables
summary(data[, c("Days.for.shipping..real.","Days.for.shipment..scheduled.","Benefit.per.order","Sales.per.customer")])
##  Days.for.shipping..real. Days.for.shipment..scheduled. Benefit.per.order 
##  Min.   :0.000            Min.   :0.000                 Min.   :-4274.98  
##  1st Qu.:2.000            1st Qu.:2.000                 1st Qu.:    7.00  
##  Median :3.000            Median :4.000                 Median :   31.52  
##  Mean   :3.498            Mean   :2.932                 Mean   :   21.98  
##  3rd Qu.:5.000            3rd Qu.:4.000                 3rd Qu.:   64.80  
##  Max.   :6.000            Max.   :4.000                 Max.   :  911.80  
##  Sales.per.customer
##  Min.   :   7.49   
##  1st Qu.: 104.38   
##  Median : 163.99   
##  Mean   : 183.11   
##  3rd Qu.: 247.40   
##  Max.   :1939.99
# Create a histogram of sales per customer
ggplot(data, aes(x = `Sales.per.customer`)) +
  geom_histogram() +
  labs(x = "Sales per customer", y = "Count", title = "Distribution of Sales per Customer")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Create a bar chart of delivery status
ggplot(data, aes(x = `Delivery.Status`)) +
  geom_bar() +
  labs(x = "Delivery Status", y = "Count", title = "Delivery Status Distribution")

ggplot(data, aes(x = Late_delivery_risk, fill = Delivery.Status)) +
  geom_density(alpha = 0.5) +
  labs(title = "Late Delivery Risk by Delivery Status")

ggplot(data, aes(x = Category.Name, y = Sales, fill = Category.Name)) +
  geom_boxplot() +
  labs(title = "Category Name vs. Sales")

ggplot(data, aes(x = Shipping.Mode, y = Late_delivery_risk, fill = Shipping.Mode)) +
  geom_boxplot() +
  labs(title = "Shipping Mode vs. Late Delivery Risk")

# Highest revenue based on Department.

sales_by_deapartmnet  <- aggregate(data$Sales, by = list(Department = data$Department.Name), FUN = sum)

# create the plot
d <- ggplot(sales_by_deapartmnet, aes(x = Department, y = x, fill = Department, text = paste("Department: ", Department, "<br>", "Sales: $", x))) +
  geom_bar(stat = "identity") +
  labs(title = "Total Sales by Department") +
  xlab("Department") +
  ylab("Sales") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

# convert the plot to plotly
ggplotly(d)
#Top 10 countries which are selling porduct to the customers.

# calculate the count of orders by Order.Country
orders_by_country <- data %>%
  group_by(Order.Country) %>%
  summarize(count = n()) %>%
  arrange(desc(count)) %>%
  slice(1:10)


#Error in the output Because of mexico
# create the plot 
p <- ggplot(orders_by_country, aes(x = Order.Country, y = count, fill = Order.Country, text = paste("Country: ", Order.Country, "<br>", "Orders: ", count))) +
  geom_bar(stat = "identity") +
  labs(title = "Top 10 Order Countries by Customer Orders") +
  xlab("Order Country") +
  ylab("Customer Orders") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))


# convert the plot to plotly
ggplotly(p)
#Plotting the region based sales to the customers.

# create a data frame with the count of orders by order region
order_region_count <- data %>%
  group_by(Order.Region) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) 

# create the plot
plot <- ggplot(order_region_count, aes(x = Order.Region, y = Count, fill = Order.Region, text = paste("Order Region: ", Order.Region, "<br>", "Order Count: ", Count))) +
  geom_bar(stat = "identity") +
  labs(title = "Order Regions by Count of Orders from Customers") +
  xlab("Order Region") +
  ylab("Order Count") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_fill_viridis_d()

# convert the plot to plotly
ggplotly(plot)
# get the top 20 customers who did the highest sales.

# create a data frame with sales per customer
sales_per_customer <- data %>%
  group_by(Customer.Id) %>%
  summarise(total_sales = sum(Sales)) %>%
  arrange(desc(total_sales))

# get the top 20 customers who did the highest sales.

top_customers <- head(sales_per_customer, 20)

# create a new column with the combined first and last name of each customer
#Issue with row-1.
top_customers <- top_customers %>%
  left_join(data %>% select(Customer.Id, Customer.Fname, Customer.Lname), by = "Customer.Id") %>%
  mutate(CustomerName = paste(Customer.Fname, Customer.Lname, sep = " "))


# create the plot
plot <- ggplot(top_customers, aes(x = reorder(CustomerName, -total_sales), y = total_sales, fill = CustomerName, text = paste("Customer Name: ", CustomerName, "<br>", "Total Sales: $", total_sales))) +
  geom_bar(stat = "identity") +
  labs(title = "Top 20 Customers by Sales") +
  xlab("Customer Name") +
  ylab("Total Sales") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

# convert the plot to plotly
ggplotly(plot)
# Aggregate the data by Customer.City and calculate the total number of orders
Customer.City_orders <- data %>%
  group_by(Customer.City, Latitude, Longitude) %>%
  summarise(total_orders = n()) %>%
  arrange(desc(total_orders))
## `summarise()` has grouped output by 'Customer.City', 'Latitude'. You can
## override using the `.groups` argument.
# Create color palette
colorPalette <- colorNumeric(palette = "YlOrRd", domain = Customer.City_orders$total_orders)

# Create heat map
# Set the initial view to focus on the US
m <- leaflet(Customer.City_orders) %>%
  addTiles() %>%
  setView(lng = -95.7129, lat = 37.0902, zoom = 4) %>%
  addHeatmap(lng = ~Longitude, lat = ~Latitude, intensity = ~total_orders, blur = 20, max = max(Customer.City_orders$total_orders))

m